/*
Revisit? Use full dataset (all patents) to assign first patentee status
Use merged to city patent to make correct counts
*/

clear

// =====================
// Full Patents information
// =====================

import 	delimited ".\input\CUSP\patents_inventor_name_location_from_1900.csv", colr(1:2)
gen 	counter=_n

tempfile allpats
save	"`allpats'", replace
clear

import 	delimited ".\input\CUSP\patents_inventor_name_location_from_1900.csv", colr(4:9)
gen 	counter=_n

merge 1:1 counter using "`allpats'"
drop	_merge 

** Set up first author info **
gen		first_inv = 1 if _n==1
replace first_inv = (patnum[_n-1]!=patnum[_n]) if _n>1

** Calculate total number of authors **
bys patnum: egen n_invs = count(patnum)

** Check if "-999" for both inv_lat and inv_long correspond to international (appears it does)
gen		international = 1 if inv_lat == -999 & inv_long == -999 & inv_country != "us"  				
drop 	if international == 1       	// Drop international patents
drop	international inv_lat inv_long inv_county inv_fips inv_state inv_country 	// Don't need these

compress

duplicates drop patnum inv_name, force

save	"`allpats'", replace
clear

// =====================
// Prep Assignee Dataset
// =====================

import 	delimited ".\input\CUSP\patents_assignee_name_location_from_1900.csv", colr(1:1)
duplicates drop

gen		pat_has_assignee = 1

compress
tempfile assg
save	"`assg'", replace
clear

// =====================
// Prep Merged NPI_city Inventor Dataset
// =====================

import 	delimited ".\input\CUSP\inv_patents_assigned_to_npi_cities.csv", colr(2:3)
gen 	counter=_n

tempfile pnums
save	"`pnums'", replace
clear

import 	delimited ".\input\CUSP\inv_patents_assigned_to_npi_cities.csv", colr(5:16)
gen 	counter=_n

merge 1:1 counter using "`pnums'"
drop	_merge counter

duplicates drop patnum inv_name, force // Only a single patent, misclassified

compress

merge 1:1 patnum inv_name using "`allpats'"
keep if _merge==3
drop	_merge

** Clean up, assignee info, and common definitions ** 
keep	patnum counter npi_id first_inv n_invs

merge m:1 patnum using "`assg'"
drop if _merge==2
drop	_merge

replace	pat_has_assignee = 0 if mi(pat_has_assignee)

gen		pat_first_inv = (first_inv==1)		// First inventor only
gen 	pat_all_inv = 1 					// Each inventor counts as one 		
gen 	pat_wtd_inv = 1/n_invs 		// Split weights across inventors

gen 	pat_single_inv = (n_invs==1)

drop 	first_inv n_invs

compress

tempfile patentcounts
save	"`patentcounts'", replace

clear

// =================
// Prep Date Dataset
// =================

import	delimited ".\input\CUSP\patents_fyear_iyear_from_1900.csv"

drop	if fyear < 1900					// Drop observations before 1900
drop	if fyear > 2010 & fyear < 2020  // Drop observations since 2011
drop	if mi(fyear) 					// 9515 patents without discernable filing year
drop 	if mi(fmonth)					// 20403 patents without discernable filing month

foreach a in i f p {
	rename `a'year `a'_yr
	rename `a'month `a'_m
}

keep 	patnum ?_yr ?_m	
preserve
	gen	n = 1
	collapse (sum) n, by(f_yr)
	keep if f_yr==1910
	sum n
	local pats1910 = r(mean)
	di "Total contain `r(mean)' 1910 US Patent filings"
restore

compress

// =========================================================
// Combine Dates/Inventors and Collapse by Month-Year-County
// =========================================================

merge	1:m patnum using "`patentcounts'"
/* _merge=2: international patents
   _merge=1: outside date range */
drop if _merge!=3
drop 	_merge   

compress
save 	"./output/intermediate/cities_patents_list", replace
*use 	"./output/intermediate/cities_patents_list", clear
** Weighted number of single and multiple-inventor patents **

preserve
	keep if pat_single_inv==1
	keep pat_single_inv f_m f_yr npi_id pat_has_assignee
	collapse (sum) pat_single_inv, by(f_m f_yr npi_id pat_has_assignee)
	
	reshape wide pat_single_inv, i(f_m f_yr npi_id) j(pat_has_assignee)
	
	replace pat_single_inv0 = 0 if mi(pat_single_inv0)
	replace pat_single_inv1 = 0 if mi(pat_single_inv1)
	
	gen		pat_single_inv = pat_single_inv0 + pat_single_inv1
	rename	pat_single_inv0 pat_single_inv_noassg 
	rename	pat_single_inv1 pat_single_inv_wassg 
	
	tempfile singlepats
	save "`singlepats'", replace
restore  
  
preserve
	keep if pat_single_inv!=1
	keep pat_all_inv pat_wtd_inv f_m f_yr npi_id pat_has_assignee
	collapse (sum) pat_all_inv pat_wtd_inv, by(f_m f_yr npi_id pat_has_assignee)
	
	reshape wide pat_all_inv pat_wtd_inv, i(f_m f_yr npi_id) j(pat_has_assignee)
	
	foreach v in pat_all_inv pat_wtd_inv {
		replace `v'0 = 0 if mi(`v'0)
		replace `v'1 = 0 if mi(`v'1)
		gen		`v' = `v'0 + `v'1
		rename	`v'0 `v'_noassg 
		rename	`v'1 `v'_wassg 
	}
	
	rename *_all_* *_multinvs_*
	rename *_wtd_* *_multpats_*
	
	tempfile multipats
	save "`multipats'", replace
restore    
  
drop 	pat_single_inv
  
collapse (sum) pat_first_inv pat_all_inv pat_wtd_inv, by(f_m f_yr npi_id pat_has_assignee)

reshape wide pat_first_inv pat_all_inv pat_wtd_inv, i(f_m f_yr npi_id) j(pat_has_assignee)

foreach v in pat_first_inv pat_all_inv pat_wtd_inv {
	replace `v'0 = 0 if mi(`v'0)
	replace `v'1 = 0 if mi(`v'1)
	gen		`v' = `v'0 + `v'1
	rename	`v'0 `v'_noassg 
	rename	`v'1 `v'_wassg 
}

merge 1:1 f_m f_yr npi_id using "`singlepats'"
drop	_merge

merge 1:1 f_m f_yr npi_id using "`multipats'"
drop	_merge

compress

sort 	f_yr f_m npi_id
gen 	f_myr = ym(f_yr, f_m)
format 	f_myr %tm

order 	npi_id f_myr f_yr f_m , first

** Extend and fill in 0s

xtset 	npi_id f_myr	
tsfill, full

drop 	f_yr f_m

unab pvars : pat_*

foreach v of local pvars {
	replace `v' = 0 if mi(`v')
}

order 	npi_id f_myr, first

keep if tin(1900m1, 1929m12)

** Labels! **

lab var pat_first_inv 			"Patents with local first inventor (total)"
lab var pat_first_inv_noassg	"Patents with local first inventor (no assignee)"
lab var pat_first_inv_wassg 	"Patents with local first inventor (with assignee)"

lab var pat_all_inv 			"Local inventors on patents (each inventor = 1) (total)"
lab var pat_all_inv_noassg		"Local inventors on patents (each inventor = 1) (no assignee)"
lab var pat_all_inv_wassg 		"Local inventors on patents (each inventor = 1) (with assignee)"

lab var pat_wtd_inv 			"Local inventors on patents (each inventor = 1/num of invs) (total)"
lab var pat_wtd_inv_noassg		"Local inventors on patents (each inventor = 1/num of invs) (no assignee)"
lab var pat_wtd_inv_wassg 		"Local inventors on patents (each inventor = 1/num of invs) (with assignee)"

lab var pat_single_inv 			"Local single-inventor patents (total)"
lab var pat_single_inv_noassg	"Local single-inventor patents (no assignee)"
lab var pat_single_inv_wassg 	"Local single-inventor patents (with assignee)"

lab var pat_multinvs_inv 		"Local inventors on multi-inventor patents (total)"
lab var pat_multinvs_inv_noassg "Local inventors on multi-inventor patents (no assignee)"
lab var pat_multinvs_inv_wassg 	"Local inventors on multi-inventor patents (with assignee)"

lab var pat_multpats_inv 		"Local multi-inventor patents (each inventor = 1/num of invs) (total)"
lab var pat_multpats_inv_noassg	"Local multi-inventor patents (each inventor = 1/num of invs) (no assignee)"
lab var pat_multpats_inv_wassg 	"Local multi-inventor patents (each inventor = 1/num of invs) (with assignee)"

compress 
save 	"./output/intermediate/cities_patents_19001929", replace

clear
